{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
    "# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n",
    "# For example, here's several helpful packages to load in \n",
    "\n",
    "import numpy as np # linear algebra\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "\n",
    "# Input data files are available in the \"../input/\" directory.\n",
    "# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n",
    "\n",
    "import os\n",
    "\n",
    "# Any results you write to the current directory are saved as output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"../data/dataset/train.csv\").fillna(\"no_comments\")\n",
    "test = pd.read_csv(\"../data/dataset/test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "_uuid": "738b9c2aafa2e019369597094321ef4a8f9957bc",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = train[train.label != 'unrelated']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "_uuid": "ef69dc9e27f91bbfe22354ac5d1b5a58f2619fda",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train['spn_1_hash'] = train['title1_zh']\n",
    "train['spn_2_hash'] = train['title2_zh']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "_uuid": "580a4ad9824fd9db80d56f9f681e75e1998e719f",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train['label'] = (train['label'] != 'disagreed').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "_uuid": "0d4636e0264345a10697dc4dcb7d918fdfeea708"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    92973\n",
       "0     8266\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['label'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def same_order(a, b):\n",
    "    return a, b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "_uuid": "9d1a76eb26ff082ffef146997df56e670fd2fcd8",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from collections import defaultdict\n",
    "graph = defaultdict(set)\n",
    "pos_edges = set()\n",
    "neg_edges = set()\n",
    "edges = set()\n",
    "\n",
    "def build_graph(row):\n",
    "    node_1 = row['spn_1_hash']\n",
    "    node_2 = row['spn_2_hash']\n",
    "    label = row['label']\n",
    "        \n",
    "    graph[node_2].add(node_1) # node_2 must connect to node_1\n",
    "\n",
    "    if label:\n",
    "        # in this case, it's bidirectional\n",
    "        pos_edges.add(same_order(node_1, node_2))\n",
    "        pos_edges.add(same_order(node_2, node_1))\n",
    "        graph[node_1].add(node_2)\n",
    "    else:\n",
    "        neg_edges.add(same_order(node_2, node_1))\n",
    "    edges.add(same_order(node_2, node_1))\n",
    "    \n",
    "n = train[['spn_1_hash', 'spn_2_hash', 'label']].apply(build_graph, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pos_augments = set()\n",
    "neg_augments = set()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "_uuid": "a2dc33a2e9639ade377ba45fe12de1ffe0825995",
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def add_pos_edges():\n",
    "    counter = 0\n",
    "    ncc = 0\n",
    "    pos_counter = 0\n",
    "    tricky_pairs = set()\n",
    "    # TRIANGLE CASE, A->B and B->C and A->C, if A=B, will A=C and B=C ?\n",
    "    for src, dst in pos_edges:\n",
    "        src_point_to = graph[src]\n",
    "        dst_point_to = graph[dst]\n",
    "        src_dst_both_point_to = src_point_to.intersection(dst_point_to) # A point to C, and also B point to C\n",
    "\n",
    "        if len(src_dst_both_point_to) == 0:\n",
    "            ncc += 1\n",
    "\n",
    "        for v in src_dst_both_point_to:\n",
    "            if (dst, v) in pos_edges:\n",
    "                if (src, v) in pos_edges:\n",
    "                    pos_counter += 1\n",
    "                else:\n",
    "                    print(\"POS-Tricky\", src, \"|\", v)\n",
    "                    tricky_pairs.add(same_order(src, v))\n",
    "                counter += 1\n",
    "    print(\"pos_counter\", pos_counter, \"counter\", counter)\n",
    "    print(\"Triangle case\", pos_counter / counter)\n",
    "    print(\"NCC\", ncc)\n",
    "    \n",
    "def add_neg_edges():\n",
    "    counter = 0\n",
    "    neg_counter = 0\n",
    "    tricky_pairs = set()\n",
    "\n",
    "    for src, dst in neg_edges:\n",
    "        src_point_to = graph[src]\n",
    "        dst_point_to = graph[dst]\n",
    "        src_dst_both_point_to = src_point_to.intersection(dst_point_to)\n",
    "\n",
    "        for v in src_dst_both_point_to:\n",
    "            if (dst, v) in pos_edges:\n",
    "                if (src, v) in neg_edges:\n",
    "                    neg_counter += 1\n",
    "                else:\n",
    "                    print(\"NEG-Tricky\", src, \"|\", v)\n",
    "                    tricky_pairs.add(same_order(src, v))\n",
    "                counter += 1\n",
    "    print('neg_counter', neg_counter, 'counter', counter)\n",
    "    print(\"Triangle case\", neg_counter / counter)\n",
    "\n",
    "def augments():\n",
    "    counter = 0\n",
    "    neg_counter = 0\n",
    "\n",
    "    for src, dst in pos_edges:\n",
    "        src_point_to = graph[src]\n",
    "        dst_point_to = graph[dst]\n",
    "\n",
    "        dst_outs = dst_point_to - src_point_to\n",
    "        for v in dst_outs:\n",
    "            if (dst, v) in pos_edges:\n",
    "                pos_augments.add((src, v))\n",
    "\n",
    "    for src, dst in neg_edges:\n",
    "        src_point_to = graph[src]\n",
    "        dst_point_to = graph[dst]\n",
    "\n",
    "        dst_outs = dst_point_to - src_point_to\n",
    "        for v in dst_outs:\n",
    "            if (dst, v) in pos_edges:\n",
    "                neg_augments.add((src, v))    \n",
    "\n",
    "    print(\"Augmented pos cases\", len(pos_augments))\n",
    "    print(\"Augmented neg cases\", len(neg_augments))\n",
    "    \n",
    "def add_augmented_links():\n",
    "    for src, dst in pos_augments:\n",
    "        graph[src].add(dst)\n",
    "        pos_edges.add(same_order(src, dst))\n",
    "        edges.add(same_order(src, dst))\n",
    "    \n",
    "    for src, dst in neg_augments:\n",
    "        graph[src].add(dst)\n",
    "        neg_edges.add(same_order(src, dst))\n",
    "        edges.add(same_order(src, dst))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "_uuid": "f995ea66397041e930e74faec520ee7d92d818a2",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "In interation 1\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 十年鸡头如砒霜？医生建议：这8种肉最好别吃\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "pos_counter 556327 counter 556332\n",
      "Triangle case 0.999991012560845\n",
      "NCC 52816\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 农村俗话讲“十年鸡头胜砒霜”，这句话能相信吗？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「币圈下午茶」币安3天内流入5万枚BTC /市值前30的币种全部下跌\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 3天内币安流入5万枚BTC，在密谋些什么？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「迅解区块链」3天内流入5万枚BTC，币安到底在密谋些什么呢？\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "neg_counter 2492 counter 2497\n",
      "Triangle case 0.9979975971165399\n",
      "Augmented pos cases 484526\n",
      "Augmented neg cases 7995\n",
      "In interation 2\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 十年鸡头如砒霜？医生建议：这8种肉最好别吃\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "pos_counter 15022197 counter 15022206\n",
      "Triangle case 0.999999400886927\n",
      "NCC 0\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 农村俗话讲“十年鸡头胜砒霜”，这句话能相信吗？\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「币圈下午茶」币安3天内流入5万枚BTC /市值前30的币种全部下跌\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 3天内币安流入5万枚BTC，在密谋些什么？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「迅解区块链」3天内流入5万枚BTC，币安到底在密谋些什么呢？\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "neg_counter 116396 counter 116414\n",
      "Triangle case 0.999845379421719\n",
      "Augmented pos cases 648756\n",
      "Augmented neg cases 17396\n",
      "In interation 3\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 十年鸡头如砒霜？医生建议：这8种肉最好别吃\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "pos_counter 24991734 counter 24991744\n",
      "Triangle case 0.9999995998678604\n",
      "NCC 0\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 农村俗话讲“十年鸡头胜砒霜”，这句话能相信吗？\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「币圈下午茶」币安3天内流入5万枚BTC /市值前30的币种全部下跌\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 3天内币安流入5万枚BTC，在密谋些什么？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「迅解区块链」3天内流入5万枚BTC，币安到底在密谋些什么呢？\n",
      "neg_counter 473543 counter 473563\n",
      "Triangle case 0.9999577669708148\n",
      "Augmented pos cases 704240\n",
      "Augmented neg cases 18891\n",
      "In interation 4\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 十年鸡头如砒霜？医生建议：这8种肉最好别吃\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "pos_counter 33549228 counter 33549238\n",
      "Triangle case 0.999999701930637\n",
      "NCC 0\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 农村俗话讲“十年鸡头胜砒霜”，这句话能相信吗？\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「币圈下午茶」币安3天内流入5万枚BTC /市值前30的币种全部下跌\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 3天内币安流入5万枚BTC，在密谋些什么？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「迅解区块链」3天内流入5万枚BTC，币安到底在密谋些什么呢？\n",
      "neg_counter 613990 counter 614010\n",
      "Triangle case 0.9999674272405987\n",
      "Augmented pos cases 707106\n",
      "Augmented neg cases 19091\n",
      "In interation 5\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？\n",
      "POS-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 十年鸡头如砒霜？医生建议：这8种肉最好别吃\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "POS-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂首次回应，50亿捐款已经办妥！\n",
      "pos_counter 34308750 counter 34308760\n",
      "Triangle case 0.9999997085292502\n",
      "NCC 0\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | 农村俗话讲“十年鸡头胜砒霜”，这句话能相信吗？\n",
      "NEG-Tricky “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？ | “十年鸡头胜砒霜”网上疯传的农产品谣言，真相到底是什么？\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂终于回应，称50亿捐款已办妥！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？发嫂发声辟谣：我老公很好\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 一代影视巨星周润发，发哥因病在香港去世，发嫂做出回应！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传 发嫂：大吉大利！\n",
      "NEG-Tricky 周润发因病去世？发嫂发声辟谣：我老公很好 | 周润发因病去世？香港新闻截图疯传发嫂发声：大吉大利我老公很好\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「币圈下午茶」币安3天内流入5万枚BTC /市值前30的币种全部下跌\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 3天内币安流入5万枚BTC，在密谋些什么？\n",
      "NEG-Tricky 传言币安三天流入5万枚比特币！官微辟谣：还能再假一点吗？ | 「迅解区块链」3天内流入5万枚BTC，币安到底在密谋些什么呢？\n",
      "neg_counter 649842 counter 649862\n",
      "Triangle case 0.9999692242352992\n",
      "Augmented pos cases 707106\n",
      "Augmented neg cases 19091\n",
      "Finished\n"
     ]
    }
   ],
   "source": [
    "i = 0\n",
    "\n",
    "while(True):\n",
    "    i += 1\n",
    "    print(\"In interation\", i)\n",
    "    \n",
    "    pos_size = len(pos_augments)\n",
    "    neg_size = len(neg_augments)\n",
    "    \n",
    "    add_pos_edges()\n",
    "    add_neg_edges()\n",
    "    augments()\n",
    "    add_augmented_links()\n",
    "    \n",
    "    if len(pos_augments) == pos_size and len(neg_augments) == neg_size:\n",
    "        print(\"Finished\")\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test = pd.read_csv(\"../data/dataset/test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "neg_samples = set([v[0] for v in neg_augments])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def mark_neg(row):\n",
    "    if (row['title2_zh'], row['title1_zh']) in neg_augments:\n",
    "        return 1\n",
    "    return 0\n",
    "\n",
    "def mark_pos(row):\n",
    "    if (row['title1_zh'], row['title2_zh']) in pos_augments:\n",
    "        return 1\n",
    "    if (row['title2_zh'], row['title1_zh']) in pos_augments:\n",
    "        return 1    \n",
    "    return 0\n",
    "\n",
    "def mark(row):\n",
    "    if (row['title1_zh'], row['title2_zh']) in pos_augments:\n",
    "        return 'agreed'\n",
    "    if (row['title2_zh'], row['title1_zh']) in pos_augments:\n",
    "        return 'agreed'\n",
    "    if (row['title2_zh'], row['title1_zh']) in neg_augments:\n",
    "        return 'disagreed'\n",
    "    return 'failed'\n",
    "        \n",
    "test['mark_neg'] = test.apply(lambda row: mark_neg(row), axis=1)\n",
    "test['mark_pos'] = test.apply(lambda row: mark_pos(row), axis=1)\n",
    "test['deal_with_the_devil'] = test.apply(lambda row: mark(row), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "failed       73238\n",
       "agreed        6774\n",
       "disagreed      114\n",
       "Name: deal_with_the_devil, dtype: int64"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['deal_with_the_devil'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "114"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['mark_neg'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6774"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['mark_pos'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "best_predictions = pd.read_csv('../data/high_ground/final_answer.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Check improvement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "labeled = test['deal_with_the_devil'] != 'failed'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "agreed       5956\n",
       "unrelated     843\n",
       "disagreed      89\n",
       "Name: Category, dtype: int64"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_predictions[labeled]['Category'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Difference 0\n"
     ]
    }
   ],
   "source": [
    "print(\"Difference\", (best_predictions[labeled]['Category'] != test[labeled]['deal_with_the_devil']).sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "unrelated    51536\n",
       "agreed       26490\n",
       "disagreed     2100\n",
       "Name: Category, dtype: int64"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_predictions['Category'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "unrelated    0.632666\n",
       "agreed       0.340813\n",
       "disagreed    0.026521\n",
       "Name: Category, dtype: float64"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_predictions['Category'].value_counts() / len(best_predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6888"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len((test[labeled]['deal_with_the_devil']).values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "best_predictions['Fake'] = test['deal_with_the_devil'].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def deal_with_the_devil(row):\n",
    "    if row['Fake'] != 'failed' and row['Fake'] != row['Category']:\n",
    "        return row['Fake']\n",
    "    return row['Category']\n",
    "    \n",
    "\n",
    "best_predictions['Category'] = best_predictions[['Category', 'Fake']].apply(lambda row: deal_with_the_devil(row), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "best_predictions = best_predictions.drop(['Fake'], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "best_predictions.to_csv(\"../data/high_ground/final_answer.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Category</th>\n",
       "      <th>Id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321199</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321201</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321196</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321207</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321214</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>agreed</td>\n",
       "      <td>321218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>321220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80096</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80097</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80098</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80099</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401539</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80100</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80101</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401540</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80102</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80103</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401544</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80104</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80105</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401546</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80106</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80107</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80108</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401547</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80109</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80110</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80111</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401548</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80112</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401552</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80113</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80114</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401554</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80115</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80116</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80117</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401557</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80118</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401558</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80119</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401555</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80120</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401561</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80121</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80122</th>\n",
       "      <td>agreed</td>\n",
       "      <td>401560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80123</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80124</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401563</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80125</th>\n",
       "      <td>unrelated</td>\n",
       "      <td>401564</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>80126 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        Category      Id\n",
       "0      unrelated  321187\n",
       "1      unrelated  321190\n",
       "2      unrelated  321189\n",
       "3      unrelated  321193\n",
       "4      unrelated  321191\n",
       "5      unrelated  321194\n",
       "6      unrelated  321192\n",
       "7      unrelated  321197\n",
       "8      unrelated  321195\n",
       "9         agreed  321199\n",
       "10        agreed  321198\n",
       "11        agreed  321201\n",
       "12     unrelated  321196\n",
       "13        agreed  321200\n",
       "14     unrelated  321203\n",
       "15        agreed  321202\n",
       "16        agreed  321206\n",
       "17     unrelated  321204\n",
       "18        agreed  321207\n",
       "19     unrelated  321208\n",
       "20     unrelated  321205\n",
       "21        agreed  321209\n",
       "22        agreed  321211\n",
       "23     unrelated  321210\n",
       "24     unrelated  321214\n",
       "25        agreed  321213\n",
       "26     unrelated  321215\n",
       "27     unrelated  321212\n",
       "28        agreed  321218\n",
       "29     unrelated  321220\n",
       "...          ...     ...\n",
       "80096  unrelated  401533\n",
       "80097  unrelated  401536\n",
       "80098     agreed  401538\n",
       "80099  unrelated  401539\n",
       "80100  unrelated  401537\n",
       "80101     agreed  401540\n",
       "80102     agreed  401541\n",
       "80103     agreed  401544\n",
       "80104  unrelated  401543\n",
       "80105     agreed  401546\n",
       "80106     agreed  401542\n",
       "80107     agreed  401545\n",
       "80108     agreed  401547\n",
       "80109     agreed  401549\n",
       "80110  unrelated  401551\n",
       "80111     agreed  401548\n",
       "80112  unrelated  401552\n",
       "80113  unrelated  401553\n",
       "80114  unrelated  401554\n",
       "80115     agreed  401550\n",
       "80116  unrelated  401556\n",
       "80117  unrelated  401557\n",
       "80118  unrelated  401558\n",
       "80119  unrelated  401555\n",
       "80120  unrelated  401561\n",
       "80121  unrelated  401559\n",
       "80122     agreed  401560\n",
       "80123  unrelated  401562\n",
       "80124  unrelated  401563\n",
       "80125  unrelated  401564\n",
       "\n",
       "[80126 rows x 2 columns]"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_predictions"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
